import json

import requests
from datetime import datetime
url = 'https://www.jitashe.org/guide/hottab/'

headers = {
  'authority': 'www.jitashe.org',
  'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
  'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  'cache-control': 'max-age=0',
  'cookie': 'yGhj_40fe_saltkey=E1dc7C39; yGhj_40fe_lastvisit=1670458019; Hm_lvt_4ad169a3774e8f5be3c7945513632bde=1670461621,1670465229; yGhj_40fe_st_p=0%7C1670473408%7Cab27a95f934edff32b149be9a28a5509; yGhj_40fe_viewid=tid_1391871; yGhj_40fe_lastact=1670474597%09forum.php%09guide; Hm_lpvt_4ad169a3774e8f5be3c7945513632bde=1670474599; yGhj_40fe_lastact=1670474748%09forum.php%09guide',
  'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Microsoft Edge";v="108"',
  'sec-ch-ua-mobile': '?0',
  'sec-ch-ua-platform': '"Windows"',
  'sec-fetch-dest': 'document',
  'sec-fetch-mode': 'navigate',
  'sec-fetch-site': 'none',
  'sec-fetch-user': '?1',
  'upgrade-insecure-requests': '1',
  'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.42'
}

res = requests.get(url, headers=headers)
# print(res)
# print(res.text)

from lxml import etree

from urllib import  parse

"""
/html/body/div[5]/div/div[2]/div[1]/div/div[3]

/html/body/div[5]/div/div[2]/div[1]/div/div[3]/div[1]/div[2]/a
/html/body/div[5]/div/div[2]/div[1]/div/div[3]/div[1]/div[2]/a
//div[3]/div[4]/div[2]/a

/html/body/div[1]/div[4]/div[3]/div[2]/div/div
"""
trees = etree.HTML(res.text)
divs = trees.xpath('/html/body/div[1]/div[4]/div[3]/div[2]/div/div')
print(divs)
divs2 = trees.xpath('/html/body/div[5]/div/div[2]/div[1]/div/div[3]/div[1]/div[2]/a/text()')
print(divs2)

# print(divs.xpath('.//text()'))


for div in divs[:50]:
    try:
        title = div.xpath('./div[2]/a/text()')
        print(title)
        target = div.xpath('./div[2]/a/@href')
        print(target)
        # the_real_target = parse.urljoin('https://www.jitashe.org/guide/hottab/6/',target)
        # response_level2 = requests.get(the_real_target,headers=headers,proxies= '')
        # print(response_level2)
        # requests.get(the_real_target)
    except:
        pass


date = datetime.now().strftime("%Y-%m-%d, %H:%M:%S")
print(date)


import  re
text = ""

